# -*- coding:utf-8 -*-

# jb51的“最近更新电子书”板块不更新了，但是实际上还是有新的电子书收录的
# 这个脚本遍历网页，搜索新更新的电子书

import requests
import time
import lxml.etree

head = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:86.0) Gecko/20100101 Firefox/86.0'
}

i = 979201
while(True):
    url = "https://www.jb51.net/books/" + str(i) + ".html"
    r = requests.get(url, headers = head)
    if (404 != r.status_code):
        r.encoding = r.apparent_encoding
        html = lxml.etree.HTML(r.text)
        title = html.xpath('/html/body/div[4]/div[1]/div[3]/div[1]/h1/text()')
        print(i, title[0])
    
    i = i + 1
    time.sleep(0.5)
